"""
author：fc
date：  2021/10/4
"""
#
# 数据处理之数据规范化-离散标准化(最小最大标准化)，公式为x_new=(x-min)/（max-min)，最大值会是1，最小值为0
#
import numpy as np
import pymysql
import pandas as pd

# pd.set_option("display.max_columns",None)
connect=pymysql.connect(host='localhost',user='root',passwd='root',db='datamine',autocommit=True)
sql="select * from jd"
jd_books=pd.read_sql(sql,connect) # 调用函数时要注意它的形参名
print(f"==========================jd_books=========================")
jd_books['price']=jd_books['price'].astype('float')
jd_books['comment']=jd_books['comment'].str.replace("+","1")
jd_books["comment"]=jd_books['comment'].str.replace("万",'000')
jd_books["comment"]=jd_books["comment"].astype('int')
print(jd_books)
print(jd_books.describe())
print(f"---------------------------over----------------------------------")

# 离差标准化 ,消除量纲和变异大小对它的影响
jd_books['comment']=(jd_books['comment']-jd_books['comment'].min())/(jd_books['comment'].max()-jd_books['comment'].min())
jd_books['price']=(jd_books['price']-jd_books['price'].min())/(jd_books['price'].max()-jd_books['price'].min())
print(jd_books.describe())